Chapter 9 HMSC analysis
9.2 Compute variance partitioning
# Select modelchain of interest
load("hmsc_bookdown/fit_model1_250_10.Rdata")
varpart=computeVariancePartitioning(m)
# Compute variance partitioning
varpart=computeVariancePartitioning(m)
varpart$vals %>%
as.data.frame() %>%
rownames_to_column(var="variable") %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(variable=factor(variable, levels=c("Elevation","Tree_cover", "Anthropization_cover", "logseqdepth","Random: Sampling_point", "Random: Transect"))) %>%
group_by(variable) %>%
summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
tt()| variable | mean | sd |
|---|---|---|
| Elevation | 13.76615 | 10.766178 |
| Tree_cover | 8.40550 | 5.281159 |
| Anthropization_cover | 12.37289 | 6.324097 |
| logseqdepth | 21.58481 | 13.751217 |
| Random: Sampling_point | 27.27780 | 17.329095 |
| Random: Transect | 16.59284 | 12.592882 |
# Basal tree
varpart_tree <- genome_tree %>%
keep.tip(., tip=m$spNames)
#Varpart table
varpart_table <- varpart$vals %>%
as.data.frame() %>%
rownames_to_column(var="variable") %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(variable=factor(variable, levels=rev(c("Elevation","Tree_cover","Anthropization_cover", "logseqdepth","Random: Sampling_point", "Random: Transect")))) %>%
mutate(genome=factor(genome, levels=rev(varpart_tree$tip.label)))
#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% varpart_tree$tip.label) %>%
arrange(match(genome, varpart_tree$tip.label)) %>%
mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
column_to_rownames(var = "genome") %>%
select(phylum)
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% varpart_tree$tip.label) %>%
arrange(match(genome, varpart_tree$tip.label)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Basal ggtree
varpart_tree <- varpart_tree %>%
force.ultrametric(.,method="extend") %>%
ggtree(., size = 0.3)***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
scale_fill_manual(values=colors_alphabetic)+
labs(fill="Phylum")
#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()
# Add variance stacked barplot
vertical_tree <- varpart_tree +
scale_fill_manual(values=c("#34738f","#cccccc","#ed8a45","#b2b530","#be3e2b","#83bb90","#f6de6c", "#122f3d"))+
geom_fruit(
data=varpart_table,
geom=geom_bar,
mapping = aes(x=value, y=genome, fill=variable, group=variable),
pwidth = 2,
offset = 0.05,
width= 1,
orientation="y",
stat="identity",
color = "white",
size=0.1)+
labs(fill="Variable")
vertical_tree# Select desired support threshold
support=0.9
negsupport=1-support
# Basal tree
postestimates_tree <- genome_tree %>%
keep.tip(., tip=m$spNames)
#plotBeta(hM=m, post=getPostEstimate(hM=m, parName="Beta"), param = "Support", plotTree = TRUE, covNamesNumbers=c(1,0))
# Posterior estimate table
post_beta <- getPostEstimate(hM=m, parName="Beta")$support %>%
as.data.frame() %>%
mutate(variable=m$covNames) %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(genome=factor(genome, levels=rev(postestimates_tree$tip.label))) %>%
mutate(value = case_when(
value >= support ~ "Positive",
value <= negsupport ~ "Negative",
TRUE ~ "Neutral")) %>%
mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
pivot_wider(names_from = variable, values_from = value) %>%
rename(intercept=2,
Elevation=3,
Tree_cover=4,
Anthropization_cover=5,
logseqdepth=6
) %>%
select(genome,intercept,Elevation,Tree_cover,Anthropization_cover,logseqdepth) %>%
column_to_rownames(var="genome")
#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% postestimates_tree$tip.label) %>%
arrange(match(genome, postestimates_tree$tip.label)) %>%
mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
column_to_rownames(var = "genome") %>%
select(phylum)
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% postestimates_tree$tip.label) %>%
arrange(match(genome, postestimates_tree$tip.label)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()
# Basal ggtree
postestimates_tree <- postestimates_tree %>%
force.ultrametric(.,method="extend") %>%
ggtree(., size = 0.3)***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
scale_fill_manual(values=colors_alphabetic)+
labs(fill="Phylum")
#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()
# Add posterior significant heatmap
postestimates_tree <- gheatmap(postestimates_tree, post_beta, offset=0, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
labs(fill="Trend")
postestimates_tree +
vexpand(.25, 1) # expand top#Compute the residual correlation matrix
OmegaCor = computeAssociations(m)
# Reference tree (for sorting genomes)
genome_tree_subset <- genome_tree %>%
keep.tip(., tip=m$spNames)
#Co-occurrence matrix at the animal level
supportLevel = 0.95
toPlot = ((OmegaCor[[1]]$support>supportLevel)
+ (OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean
matrix <- toPlot %>%
as.data.frame() %>%
rownames_to_column(var="genome1") %>%
pivot_longer(!genome1, names_to = "genome2", values_to = "cor") %>%
mutate(genome1= factor(genome1, levels=genome_tree_subset$tip.label)) %>%
mutate(genome2= factor(genome2, levels=genome_tree_subset$tip.label)) %>%
ggplot(aes(x = genome1, y = genome2, fill = cor)) +
geom_tile() +
scale_fill_gradient2(low = "#be3e2b",
mid = "#f4f4f4",
high = "#b2b530")+
theme_void()
vtree_top <- genome_tree_subset %>%
force.ultrametric(.,method="extend") %>%
ggtree(layout = "rectangular") +
layout_dendrogram() + # Ensure correct layout
coord_flip()***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
#create composite figure
grid.arrange(grobs = list(vtree_top,matrix,vtree_left),
layout_matrix = rbind(c(4,1,1,1,1,1,1,1,1,1,1,1),
c(3,2,2,2,2,2,2,2,2,2,2,2),
c(3,2,2,2,2,2,2,2,2,2,2,2),
c(3,2,2,2,2,2,2,2,2,2,2,2),
c(3,2,2,2,2,2,2,2,2,2,2,2),
c(3,2,2,2,2,2,2,2,2,2,2,2),
c(3,2,2,2,2,2,2,2,2,2,2,2),
c(3,2,2,2,2,2,2,2,2,2,2,2),
c(3,2,2,2,2,2,2,2,2,2,2,2),
c(3,2,2,2,2,2,2,2,2,2,2,2),
c(3,2,2,2,2,2,2,2,2,2,2,2),
c(3,2,2,2,2,2,2,2,2,2,2,2)))9.4 Elevation predictions
gradient = seq(940, 2350, by = 100)
gradientlength = length(gradient)
#Treatment-specific gradient predictions
pred_elevation <- constructGradient(m,
focalVariable = "Elevation",
non.focalVariables = list(logseqdepth=list(1),Tree_cover=list(1), Anthropization_cover=list(1)),
ngrid=gradientlength) %>%
predict(m, Gradient = ., expected = TRUE) %>%
do.call(rbind,.) %>%
as.data.frame() %>%
mutate(elevation=rep(gradient,1000)) %>%
pivot_longer(-c(elevation), names_to = "genome", values_to = "value")9.4.1 Responses to elevation
# Select desired support threshold
support=0.9
negsupport=1-support
#Get phylum colors from the EHI standard
phylum_colors <- genome_metadata %>%
left_join(read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv"), by=join_by(phylum == phylum)) %>%
arrange(match(genome, genome_tree$tip.label)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
#slice(2:5) %>%
select(colors) %>%
pull()
getPostEstimate(hM=m, parName="Beta")$support %>%
as.data.frame() %>%
mutate(variable=m$covNames) %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(trend = case_when(
value >= support ~ "Positive",
value <= negsupport ~ "Negative",
TRUE ~ "Neutral")) %>%
filter(variable=="Elevation") %>%
select(genome,trend) %>%
left_join(pred_elevation, by=join_by(genome==genome)) %>%
group_by(genome, trend, elevation) %>%
summarize(value = mean(value, na.rm = TRUE)) %>%
left_join(genome_metadata, by=join_by(genome == genome)) %>%
ggplot(aes(x=elevation, y=value, group=genome, color=phylum, linetype=trend)) +
geom_line() +
scale_linetype_manual(values=c("solid","dashed","solid")) +
scale_color_manual(values=phylum_colors) +
facet_grid(fct_rev(trend) ~ phylum) +
labs(y="Genome abundance (log)",x="Elevation") +
theme(legend.position = "none") +
theme_minimal() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 0.8,),
axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
)9.5 Functional predictions
9.5.1 Element level
elements_table <- genome_gifts_filt %>%
to.elements(., GIFT_db) %>%
as.data.frame()
community_elements <- pred_elevation %>%
group_by(elevation, genome) %>%
mutate(row_id = row_number()) %>%
pivot_wider(names_from = genome, values_from = value) %>%
ungroup() %>%
group_split(row_id) %>%
as.list() %>%
lapply(., FUN = function(x){x %>%
select(-row_id) %>%
column_to_rownames(var = "elevation") %>%
as.data.frame() %>%
exp() %>%
t() %>%
tss() %>%
to.community(elements_table,.,GIFT_db) %>%
as.data.frame() %>%
rownames_to_column(var="elevation")
})
calculate_slope <- function(x) {
lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
coef(lm_fit)[2]
}
element_predictions <- map_dfc(community_elements, function(mat) {
mat %>%
column_to_rownames(var = "elevation") %>%
t() %>%
as.data.frame() %>%
rowwise() %>%
mutate(slope = calculate_slope(c_across(everything()))) %>%
select(slope) }) %>%
t() %>%
as.data.frame() %>%
set_names(colnames(community_elements[[1]])[-1]) %>%
rownames_to_column(var="iteration") %>%
pivot_longer(!iteration, names_to="trait",values_to="value") %>%
group_by(trait) %>%
summarise(mean=mean(value),
p1 = quantile(value, probs = 0.1),
p9 = quantile(value, probs = 0.9),
positive_support = sum(value > 0)/1000,
negative_support = sum(value < 0)/1000) %>%
arrange(-positive_support)# Positively associated
p0<-positive_filtered<-element_predictions %>%
filter(mean >0) %>%
arrange(-positive_support) %>%
filter(positive_support>=0.9)
if (nrow(positive_filtered) > 0) {
positive_filtered %>%
tt() |>
style_tt(
i = which(element_predictions$positive_support < 0.9 & element_predictions$negative_support < 0.1),
background = "#E5D5B1") |>
style_tt(
i = which(element_predictions$negative_support > 0.9 & element_predictions$positive_support < 0.1),
background = "#B7BCCE")
}
#Negatively associated
p1<-negative_filtered<-element_predictions %>%
filter(mean <0) %>%
arrange(-negative_support) %>%
filter(negative_support>=0.9)
if (nrow(negative_filtered) > 0) {
negative_filtered %>%
tt() |>
style_tt(
i = which(element_predictions$positive_support < 0.9 & element_predictions$negative_support < 0.1),
background = "#E5D5B1") |>
style_tt(
i = which(element_predictions$negative_support > 0.9 & element_predictions$positive_support < 0.1),
background = "#B7BCCE")
}| trait | mean | p1 | p9 | positive_support | negative_support |
|---|---|---|---|---|---|
| D0613 | 0.034264683 | 0.0113319085 | 0.060146258 | 0.971 | 0.029 |
| B0106 | 0.017715307 | 0.0039875628 | 0.033388901 | 0.966 | 0.034 |
| D0609 | 0.014654471 | 0.0028010131 | 0.027577745 | 0.962 | 0.038 |
| D0205 | 0.008600377 | 0.0020467751 | 0.015207831 | 0.953 | 0.047 |
| D0207 | 0.031552241 | 0.0069001701 | 0.067737031 | 0.949 | 0.051 |
| B0802 | 0.037714918 | 0.0086104696 | 0.071624679 | 0.947 | 0.053 |
| D0304 | 0.014938085 | 0.0035430830 | 0.027059303 | 0.945 | 0.055 |
| B0214 | 0.019893750 | 0.0026561423 | 0.037824383 | 0.931 | 0.069 |
| B0217 | 0.013929202 | 0.0015844388 | 0.028027942 | 0.925 | 0.075 |
| D0817 | 0.004336948 | 0.0003403666 | 0.008888889 | 0.919 | 0.081 |
| trait | mean | p1 | p9 | positive_support | negative_support |
|---|---|---|---|---|---|
| B0208 | -0.0344477317 | -0.0618098460 | -1.039294e-02 | 0.016 | 0.984 |
| S0104 | -0.0209740208 | -0.0337504690 | -8.006082e-03 | 0.022 | 0.978 |
| D0805 | -0.0004581602 | -0.0009344457 | -6.018417e-05 | 0.035 | 0.965 |
| B0710 | -0.0061948836 | -0.0108430396 | -1.870104e-03 | 0.037 | 0.963 |
| B0310 | -0.0028164249 | -0.0048520436 | -8.904523e-04 | 0.043 | 0.957 |
| B1029 | -0.0003677198 | -0.0007876956 | -4.280235e-05 | 0.045 | 0.955 |
| D0604 | -0.0252076749 | -0.0512963980 | -5.101516e-03 | 0.050 | 0.950 |
| D0903 | -0.0047866601 | -0.0089646580 | -1.108197e-03 | 0.050 | 0.950 |
| B0218 | -0.0116625901 | -0.0223161257 | -2.061981e-03 | 0.053 | 0.947 |
| B0703 | -0.0192156802 | -0.0373477215 | -2.861509e-03 | 0.059 | 0.941 |
| D0206 | -0.0197688096 | -0.0392643553 | -3.454538e-03 | 0.061 | 0.939 |
| D0701 | -0.0051397234 | -0.0101283806 | -6.650890e-04 | 0.068 | 0.932 |
| D0509 | -0.0151983776 | -0.0301644940 | -2.157025e-03 | 0.070 | 0.930 |
| D0103 | -0.0058062776 | -0.0111387262 | -5.611137e-04 | 0.073 | 0.927 |
| B0903 | -0.0040583726 | -0.0082299820 | -3.182127e-04 | 0.078 | 0.922 |
| B0711 | -0.0116387135 | -0.0239457783 | -1.087371e-03 | 0.079 | 0.921 |
| D0307 | -0.0106690266 | -0.0219998777 | -7.160822e-04 | 0.083 | 0.917 |
| B0702 | -0.0164229499 | -0.0340972191 | -1.406339e-03 | 0.085 | 0.915 |
| D0508 | -0.0025923862 | -0.0054092788 | -1.884571e-04 | 0.089 | 0.911 |
| D0908 | -0.0351466114 | -0.0789647021 | -1.049356e-03 | 0.091 | 0.909 |
| D0912 | -0.0009892400 | -0.0020007917 | -1.425600e-05 | 0.092 | 0.908 |
| B0303 | -0.0072114512 | -0.0152002291 | -7.612867e-05 | 0.097 | 0.903 |
#Positively associated
positive <- element_predictions %>%
filter(mean >0) %>%
arrange(mean) %>%
filter(positive_support>=0.9) %>%
select(-negative_support) %>%
rename(support=positive_support)
#Negatively associated
negative <- element_predictions %>%
filter(mean <0) %>%
arrange(mean) %>%
filter(negative_support>=0.9) %>%
select(-positive_support) %>%
rename(support=negative_support)
all_elements <- bind_rows(positive,negative) %>%
left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
mutate(Code_function=factor(Code_function)) %>%
mutate(element_legend=str_c(trait," - ",Element)) %>%
mutate(function_legend=str_c(Code_function," - ",Function)) %>%
select(trait,mean,p1,p9,element_legend,function_legend) %>%
unique()
gift_colors <- read_tsv("data/gift_colors.tsv") %>%
mutate(legend=str_c(Code_function," - ",Function)) %>%
filter(legend %in% all_elements$function_legend)
all_elements %>%
ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
geom_point() +
geom_errorbar() +
xlim(c(-0.15,0.15)) +
geom_vline(xintercept=0) +
scale_color_manual(values = gift_colors$Color) +
theme_minimal() +
labs(x="Regression coefficient",y="Functional trait")community_elements %>%
bind_rows() %>%
pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
filter(trait %in% positive$trait) %>%
mutate(trait=factor(trait, levels=positive$trait)) %>%
mutate(elevation=as.numeric(elevation)) %>%
ggplot(aes(x=elevation, y=value)) +
geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
#geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
facet_wrap(~trait, ncol=5, scales="free") +
theme_minimal() +
labs(x="Elevation",y="Metabolic Capacity Index")9.5.1.1 GIFT test visualization
# Aggregate bundle-level GIFTs into the compound level
genome_counts_filt_filt <- tibble::rownames_to_column(genome_counts_filt_filt, var = "genome")
GIFTs_elements_filtered <- elements_table[rownames(elements_table) %in% genome_counts_filt_filt$genome, ]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>%
select_if(~ !is.numeric(.) || sum(.) != 0)
# Get community-weighed average GIFTs per sample
GIFTs_elements_community <- to.community(GIFTs_elements_filtered, genome_counts_filt_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)
GIFTs_elements_community %>%
as.data.frame() %>%
rownames_to_column(var="sample") %>%
pivot_longer(!sample,names_to="trait",values_to="gift") %>%
left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
mutate(functionid = substr(trait, 1, 3)) %>%
mutate(trait = case_when(
trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
TRUE ~ trait
)) %>%
mutate(functionid = case_when(
functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
TRUE ~ functionid
)) %>%
mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
ggplot(aes(x=sample,y=trait,fill=gift)) +
geom_tile(colour="white", linewidth=0.2)+
scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
facet_grid(functionid ~ Elevation, scales="free",space="free") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
strip.text.y = element_text(angle = 0)) +
labs(y="Traits",x="Samples",fill="GIFT")9.5.2 Functional level
functions_table <- elements_table %>%
to.functions(., GIFT_db) %>%
as.data.frame()
community_functions <- pred_elevation %>%
group_by(elevation, genome) %>%
mutate(row_id = row_number()) %>%
pivot_wider(names_from = genome, values_from = value) %>%
ungroup() %>%
group_split(row_id) %>%
as.list() %>%
lapply(., FUN = function(x){x %>%
select(-row_id) %>%
column_to_rownames(var = "elevation") %>%
as.data.frame() %>%
exp() %>%
t() %>%
tss() %>%
to.community(functions_table,.,GIFT_db) %>%
as.data.frame() %>%
rownames_to_column(var="elevation")
})#max-min option
calculate_slope <- function(x) {
lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
coef(lm_fit)[2]
}
function_predictions <- map_dfc(community_functions, function(mat) {
mat %>%
column_to_rownames(var = "elevation") %>%
t() %>%
as.data.frame() %>%
rowwise() %>%
mutate(slope = calculate_slope(c_across(everything()))) %>%
select(slope) }) %>%
t() %>%
as.data.frame() %>%
set_names(colnames(community_functions[[1]])[-1]) %>%
rownames_to_column(var="iteration") %>%
pivot_longer(!iteration, names_to="trait",values_to="value") %>%
group_by(trait) %>%
summarise(mean=mean(value),
p1 = quantile(value, probs = 0.1),
p9 = quantile(value, probs = 0.9),
positive_support = sum(value > 0)/1000,
negative_support = sum(value < 0)/1000) %>%
arrange(-positive_support)# Positively associated
p2<-function_predictions %>%
filter(mean >0) %>%
arrange(-positive_support) %>%
filter(positive_support>=0.9) %>%
tt() |>
style_tt(
i = which(function_predictions$positive_support < 0.9 & function_predictions$negative_support < 0.1),
background = "#E5D5B1") |>
style_tt(
i = which(function_predictions$negative_support > 0.9 & function_predictions$positive_support < 0.1),
background = "#B7BCCE")
# Negatively associated (there isn't)
#function_predictions %>%
#filter(mean <0) %>%
#arrange(-negative_support) %>%
#filter(negative_support>=0.9) %>%
#tt() |>
#style_tt(
#i = which(function_predictions$positive_support < 0.9 & function_predictions$negative_support < 0.1),
#background = "#E5D5B1") |>
#style_tt(
#i = which(function_predictions$negative_support > 0.9 & function_predictions$positive_support < 0.1),
#background = "#B7BCCE")| trait | mean | p1 | p9 | positive_support | negative_support |
|---|---|---|---|---|---|
| S03 | 0.026906176 | 0.0104593848 | 0.044524683 | 0.973 | 0.027 |
| B04 | 0.018264988 | 0.0051591436 | 0.033137776 | 0.970 | 0.030 |
| D03 | 0.010497822 | 0.0021459296 | 0.019986199 | 0.943 | 0.057 |
| D06 | 0.003285998 | 0.0004532722 | 0.006338923 | 0.933 | 0.067 |
| B07 | 0.012228797 | 0.0004053295 | 0.024691062 | 0.906 | 0.094 |
all_functions <- function_predictions %>%
left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
mutate(trait=factor(trait)) %>%
mutate(function_legend=str_c(trait," - ",Function)) %>%
select(trait,mean,p1,p9,function_legend) %>%
unique()
gift_colors <- read_tsv("data/gift_colors.tsv") %>%
mutate(legend=str_c(Code_function," - ",Function)) %>%
filter(legend %in% all_functions$function_legend)
all_functions %>%
ggplot(aes(x=mean, y=fct_reorder(function_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
geom_point() +
geom_errorbar() +
xlim(c(-0.05,0.06)) +
geom_vline(xintercept=0) +
scale_color_manual(values = gift_colors$Color) +
theme_minimal() +
labs(x="Regression coefficient",y="Functional trait") +
guides(col = guide_legend(ncol = 1))community_functions %>%
bind_rows() %>%
pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
filter(trait %in% function_predictions$trait) %>%
mutate(trait=factor(trait, levels=function_predictions$trait)) %>%
mutate(elevation=as.numeric(elevation)) %>%
ggplot(aes(x=elevation, y=value)) +
geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
#geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
facet_wrap(~trait, ncol=5, scales="free") +
theme_minimal() +
labs(x="Elevation",y="Metabolic Capacity Index")9.5.2.1 GIFT test visualization
# Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements_filtered, GIFT_db)
functions <- GIFTs_functions %>%
as.data.frame()
# Get community-weighed average GIFTs per sample
GIFTs_functions_community <- to.community(GIFTs_functions, genome_counts_filt_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)
GIFTs_functions_community %>%
as.data.frame() %>%
rownames_to_column(var="sample") %>%
pivot_longer(!sample,names_to="trait",values_to="gift") %>%
left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
ggplot(aes(x=trait,y=sample,fill=gift)) +
geom_tile(colour="white", size=0.2)+
scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
facet_grid(Elevation ~ ., scales="free",space="free")9.5.3 Domain level
domains_table <- functions_table %>%
to.domains(., GIFT_db) %>%
as.data.frame()
community_domains <- pred_elevation %>%
group_by(elevation, genome) %>%
mutate(row_id = row_number()) %>%
pivot_wider(names_from = genome, values_from = value) %>%
ungroup() %>%
group_split(row_id) %>%
as.list() %>%
lapply(., FUN = function(x){x %>%
select(-row_id) %>%
column_to_rownames(var = "elevation") %>%
as.data.frame() %>%
exp() %>%
t() %>%
tss() %>%
to.community(domains_table,.,GIFT_db) %>%
as.data.frame() %>%
rownames_to_column(var="elevation")
})#max-min option
calculate_slope <- function(x) {
lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
coef(lm_fit)[2]
}
domain_predictions <- map_dfc(community_domains, function(mat) {
mat %>%
column_to_rownames(var = "elevation") %>%
t() %>%
as.data.frame() %>%
rowwise() %>%
mutate(slope = calculate_slope(c_across(everything()))) %>%
select(slope) }) %>%
t() %>%
as.data.frame() %>%
set_names(colnames(community_domains[[1]])[-1]) %>%
rownames_to_column(var="iteration") %>%
pivot_longer(!iteration, names_to="trait",values_to="value") %>%
group_by(trait) %>%
summarise(mean=mean(value),
p1 = quantile(value, probs = 0.1),
p9 = quantile(value, probs = 0.9),
positive_support = sum(value > 0)/1000,
negative_support = sum(value < 0)/1000) %>%
arrange(-positive_support)# Positively associated
p3<-positive_filtered<-domain_predictions %>%
filter(mean >0) %>%
arrange(-positive_support) %>%
filter(positive_support>=0.9)
if (nrow(positive_filtered) > 0) {
positive_filtered %>%
tt() |>
style_tt(
i = which(domain_predictions$positive_support < 0.9 & domain_predictions$negative_support < 0.1),
background = "#E5D5B1") |>
style_tt(
i = which(domain_predictions$negative_support > 0.9 & domain_predictions$positive_support < 0.1),
background = "#B7BCCE")
}
# Negatively associated (there isn't)
#domain_predictions %>%
#filter(mean <0) %>%
#arrange(-negative_support) %>%
#filter(negative_support>=0.9) %>%
#tt() |>
#style_tt(
#i = which(domain_predictions$positive_support < 0.9 & domain_predictions$negative_support < 0.1),
#background = "#E5D5B1") |>
#style_tt(
#i = which(domain_predictions$negative_support > 0.9 & domain_predictions$positive_support < 0.1),
#background = "#B7BCCE")| trait | mean | p1 | p9 | positive_support | negative_support |
|---|---|---|---|---|---|
| Overall | 0.006863701 | 0.0003939157 | 0.01509651 | 0.917 | 0.083 |
all_domains <- domain_predictions %>%
left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
mutate(trait=factor(trait)) %>%
mutate(function_legend=str_c(trait," - ",Function)) %>%
select(trait,mean,p1,p9) %>%
unique()
all_domains %>%
ggplot(aes(x=mean, y=fct_reorder(trait, mean), xmin=p1, xmax=p9, color=trait)) +
geom_point() +
geom_errorbar() +
xlim(c(-0.02,0.03)) +
geom_vline(xintercept=0) +
theme_minimal() +
labs(x="Regression coefficient",y="Domain level") +
guides(col = guide_legend(ncol = 1))community_domains %>%
bind_rows() %>%
pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
filter(trait %in% domain_predictions$trait) %>%
mutate(trait=factor(trait, levels=domain_predictions$trait)) %>%
mutate(elevation=as.numeric(elevation)) %>%
ggplot(aes(x=elevation, y=value)) +
geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
#geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
facet_wrap(~trait, ncol=5, scales="free") +
theme_minimal() +
labs(x="Elevation",y="Metabolic Capacity Index")9.5.3.1 GIFT test visualization
# Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs
GIFTs_domains <- to.domains(GIFTs_functions, GIFT_db)
domains <- GIFTs_domains %>%
as.data.frame()
# Get community-weighed average GIFTs per sample
GIFTs_domains_community <- to.community(GIFTs_domains, genome_counts_filt_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)
GIFTs_domains_community %>%
as.data.frame() %>%
rownames_to_column(var="sample") %>%
pivot_longer(!sample,names_to="trait",values_to="gift") %>%
left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
ggplot(aes(x=trait,y=sample,fill=gift)) +
geom_tile(colour="white", size=0.2)+
scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
facet_grid(Elevation ~ ., scales="free",space="free")